{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr'\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('/Users/siyuyang/Source/repos/GitHub_MSFT/CameraTraps')  # append this repo to PYTHONPATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from collections import Counter, defaultdict\n",
    "from random import sample\n",
    "import math\n",
    "\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "from data_management.megadb.schema import sequences_schema_check\n",
    "from data_management.annotations.add_bounding_boxes_to_megadb import *\n",
    "from data_management.megadb.converters.cct_to_megadb import make_cct_embedded, process_sequences, write_json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Combine Sul Ross kitfox with bbox entries"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Give the path to a JSON file where output from this script will be written to. You can then take this file to the .Net app for ingestion to the database."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "path_to_output = '/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/megadb_2020/sulross_kitfox_combined_megadb.json'  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Name of the dataset**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name = 'sulross_kitfox'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 0 - Add an entry to the `datasets` table\n",
    "\n",
    "This dataset is already in the table with a few entries with human and vehicle bbox labels. We need to combine them with the newly available class data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/megadb_batches_9_10_11/sulross_kitfox_w_batch_10_boxes.json') as f:\n",
    "    existing_entries = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "len(existing_entries)\n",
    "existing_entries"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1 - Prepare the `sequence` objects to insert into the database"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 1b - If you're starting from scratch..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "label_folder = '/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Engagements/SulRoss/share_microsoft/KitFoxLabels'\n",
    "\n",
    "# some folders were renamed, so using the API output to map to path in blob storage\n",
    "api_output_path = '/Users/siyuyang/Source/temp_data/CameraTrap/engagements/SulRoss/20190619_kitfox/detector_output/detections_kitfox_20190620.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(api_output_path) as f:\n",
    "    detection_res = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_name_to_path = {}\n",
    "\n",
    "for im in detection_res['images']:\n",
    "    fn = os.path.basename(im['file'])\n",
    "    image_name_to_path[fn] = im['file']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "del detection_res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "li = []\n",
    "for csv_file in os.listdir(label_folder):\n",
    "    print(csv_file)\n",
    "    csv_path = os.path.join(label_folder, csv_file)\n",
    "    df = pd.read_csv(csv_path, index_col=None, header=0)\n",
    "    li.append(df)\n",
    "\n",
    "timelapse_df = pd.concat(li, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3393908"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(timelapse_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "File            object\n",
       "RelativePath    object\n",
       "Folder          object\n",
       "Date            object\n",
       "Time            object\n",
       "ImageQuality    object\n",
       "DeleteFlag        bool\n",
       "County          object\n",
       "Survey          object\n",
       "Analyst         object\n",
       "Notes           object\n",
       "Publicity         bool\n",
       "Empty             bool\n",
       "Person            bool\n",
       "Animal            bool\n",
       "Species         object\n",
       "species2        object\n",
       "species3        object\n",
       "Unnamed: 18     object\n",
       "dtype: object"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "timelapse_df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "timelapse_df = timelapse_df.drop(columns='Unnamed: 18')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "timelapse_df.sample(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "3393908it [07:32, 7503.91it/s]\n"
     ]
    }
   ],
   "source": [
    "entries_im_not_stored = []\n",
    "\n",
    "for i_row, row in tqdm(timelapse_df.iterrows()):\n",
    "    fn = row['File']\n",
    "    if not fn in image_name_to_path:\n",
    "        entries_im_not_stored.append(fn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "124"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(entries_im_not_stored)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "embedded = []  # list of images with all attributes at the image-level\n",
    "unidentified_animal = []\n",
    "\n",
    "for i_row, row in tqdm(timelapse_df.iterrows()):\n",
    "    fn = row['File']\n",
    "    path = image_name_to_path.get(fn, False)\n",
    "    if path is False:\n",
    "        continue\n",
    "    \n",
    "    # using the file name only to determine the seq_id, frame_num and location\n",
    "    p = fn.split('-')\n",
    "    seq_id = '-'.join(p[:-1])\n",
    "\n",
    "    frame_num = int(fn.split('(')[1].split(')')[0])\n",
    "    \n",
    "    p = fn.split('__')\n",
    "    location = p[0] + '__' + p[1]\n",
    "    \n",
    "    # other attributes from the csv columns\n",
    "    date_time = row['Date'] + ' ' + row['Time']\n",
    "    \n",
    "    is_empty = row['Empty']\n",
    "    has_person = row['Person']\n",
    "    has_animal = row['Animal']\n",
    "    \n",
    "    raw_classes = []\n",
    "    if not pd.isnull(row['Species']):\n",
    "        if not(row['Species'] is None or row['Species'] == 'None'):\n",
    "            raw_classes.append(row['Species'])\n",
    "    if not pd.isnull(row['species2']):\n",
    "        raw_classes.append(row['species2'])\n",
    "    if not pd.isnull(row['species3']):\n",
    "        raw_classes.append(row['species3'])\n",
    "        \n",
    "    animal_classes = []\n",
    "    for a in raw_classes:\n",
    "        animal_classes.append(a.lower().replace('_', ''))  # _skunk to skunk\n",
    "\n",
    "# this happens - the one sample I looked had a person, so appending 'human' below  \n",
    "#     if has_person and is_empty:\n",
    "#         print(row)\n",
    "#         break\n",
    "  \n",
    "# certain bird species are recorded but the Empty field still marked as True\n",
    "#     if has_animal and is_empty:\n",
    "#         print(row)\n",
    "#         break\n",
    "    \n",
    "    if has_animal and len(animal_classes) == 0:\n",
    "        unidentified_animal.append(row)\n",
    "        animal_classes.append('unidentified')\n",
    "    \n",
    "    if has_person:\n",
    "        animal_classes.append('human')\n",
    "    \n",
    "    if len(animal_classes) == 0:\n",
    "        animal_classes = ['empty']\n",
    "    else:\n",
    "        animal_classes = list(set(animal_classes))\n",
    "        \n",
    "    embedded.append({\n",
    "        'file': path,\n",
    "        'seq_id': seq_id,\n",
    "        'frame_num': frame_num,\n",
    "        'location': location,\n",
    "        'datetime': date_time,\n",
    "        'class': animal_classes\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The dataset_name is set to sulross_kitfox. Please make sure this is correct!\n",
      "Making a deep copy of docs...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "  0%|          | 0/3393784 [00:00<?, ?it/s]\u001b[A\n",
      "  2%|▏         | 79824/3393784 [00:00<00:04, 793629.56it/s]\u001b[A\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Putting 3393784 images into sequences...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  5%|▍         | 158572/3393784 [00:00<00:04, 790011.91it/s]\u001b[A\n",
      "  7%|▋         | 238150/3393784 [00:00<00:03, 791456.49it/s]\u001b[A\n",
      "  9%|▉         | 301941/3393784 [00:00<00:04, 753357.48it/s]\u001b[A\n",
      " 11%|█         | 367793/3393784 [00:00<00:04, 734387.67it/s]\u001b[A\n",
      " 13%|█▎        | 451151/3393784 [00:00<00:03, 751144.80it/s]\u001b[A\n",
      " 16%|█▌        | 538399/3393784 [00:00<00:03, 768449.88it/s]\u001b[A\n",
      " 18%|█▊        | 615771/3393784 [00:00<00:03, 769127.23it/s]\u001b[A\n",
      " 21%|██        | 697981/3393784 [00:00<00:03, 775027.24it/s]\u001b[A\n",
      " 23%|██▎       | 789308/3393784 [00:01<00:03, 788850.60it/s]\u001b[A\n",
      " 26%|██▌       | 878227/3393784 [00:01<00:03, 797963.13it/s]\u001b[A\n",
      " 28%|██▊       | 962848/3393784 [00:01<00:03, 801977.03it/s]\u001b[A\n",
      " 31%|███       | 1049265/3393784 [00:01<00:02, 806754.54it/s]\u001b[A\n",
      " 33%|███▎      | 1133775/3393784 [00:01<00:02, 797605.17it/s]\u001b[A\n",
      " 36%|███▌      | 1213904/3393784 [00:01<00:02, 795824.16it/s]\u001b[A\n",
      " 38%|███▊      | 1297376/3393784 [00:01<00:02, 798200.55it/s]\u001b[A\n",
      " 41%|████      | 1388643/3393784 [00:01<00:02, 804842.06it/s]\u001b[A\n",
      " 43%|████▎     | 1473626/3393784 [00:01<00:02, 807313.25it/s]\u001b[A\n",
      " 46%|████▌     | 1560281/3393784 [00:01<00:02, 810394.99it/s]\u001b[A\n",
      " 48%|████▊     | 1645154/3393784 [00:02<00:02, 811882.09it/s]\u001b[A\n",
      " 51%|█████     | 1729779/3393784 [00:02<00:02, 813403.47it/s]\u001b[A\n",
      " 53%|█████▎    | 1814342/3393784 [00:02<00:01, 803061.37it/s]\u001b[A\n",
      " 56%|█████▌    | 1893545/3393784 [00:02<00:01, 803189.19it/s]\u001b[A\n",
      " 58%|█████▊    | 1972301/3393784 [00:02<00:01, 800597.67it/s]\u001b[A\n",
      " 61%|██████    | 2053677/3393784 [00:02<00:01, 801138.49it/s]\u001b[A\n",
      " 63%|██████▎   | 2134956/3393784 [00:02<00:01, 801625.70it/s]\u001b[A\n",
      " 65%|██████▌   | 2220259/3393784 [00:02<00:01, 803491.47it/s]\u001b[A\n",
      " 68%|██████▊   | 2301455/3393784 [00:02<00:01, 803440.98it/s]\u001b[A\n",
      " 70%|███████   | 2382361/3393784 [00:03<00:01, 791301.74it/s]\u001b[A\n",
      " 73%|███████▎  | 2466359/3393784 [00:03<00:01, 792863.78it/s]\u001b[A\n",
      " 75%|███████▌  | 2551966/3393784 [00:03<00:01, 794833.18it/s]\u001b[A\n",
      " 78%|███████▊  | 2633106/3393784 [00:03<00:00, 795333.09it/s]\u001b[A\n",
      " 80%|███████▉  | 2713329/3393784 [00:03<00:00, 795538.83it/s]\u001b[A\n",
      " 82%|████████▏ | 2793246/3393784 [00:03<00:00, 794057.18it/s]\u001b[A\n",
      " 85%|████████▍ | 2871620/3393784 [00:03<00:00, 793634.14it/s]\u001b[A\n",
      " 87%|████████▋ | 2949875/3393784 [00:03<00:00, 793046.68it/s]\u001b[A\n",
      " 89%|████████▉ | 3034498/3393784 [00:03<00:00, 794376.23it/s]\u001b[A\n",
      " 92%|█████████▏| 3121004/3393784 [00:03<00:00, 796208.03it/s]\u001b[A\n",
      " 94%|█████████▍| 3202894/3393784 [00:04<00:00, 795582.09it/s]\u001b[A\n",
      " 97%|█████████▋| 3283461/3393784 [00:04<00:00, 794215.74it/s]\u001b[A\n",
      " 99%|█████████▉| 3364752/3393784 [00:04<00:00, 794641.87it/s]\u001b[A\n",
      "100%|██████████| 3393784/3393784 [00:04<00:00, 791081.17it/s]\u001b[A"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of sequences: 1020306\n",
      "Checking the location field...\n",
      "Checking which fields in a CCT image entry are sequence-level...\n",
      "\n",
      "all_img_properties\n",
      "{'file', 'location', 'datetime', 'frame_num', 'class'}\n",
      "\n",
      "img_level_properties\n",
      "{'file', 'frame_num', 'class', 'datetime'}\n",
      "\n",
      "image-level properties that really should be sequence-level\n",
      "{'location'}\n",
      "\n",
      "Finished processing sequences.\n",
      "Example sequence items:\n",
      "\n",
      "{'seq_id': 'Reeves002__Cam001__2019-02-19__14-21', 'dataset': 'sulross_kitfox', 'images': [{'file': 'Reeves002/Cam001/Reeves002__Cam001__2019-02-19__14-21-54(1).JPG', 'frame_num': 1, 'datetime': '19-Feb-19 14:21:54', 'class': ['empty']}, {'file': 'Reeves002/Cam001/Reeves002__Cam001__2019-02-19__14-21-59(2).JPG', 'frame_num': 2, 'datetime': '19-Feb-19 14:21:59', 'class': ['empty']}], 'location': 'Reeves002__Cam001'}\n",
      "\n",
      "[{'seq_id': 'Hudspeth001__Cam010__2018-08-06__14-09', 'dataset': 'sulross_kitfox', 'images': [{'file': 'Hudspeth001/Cam010/Hudspeth001__Cam010__2018-08-06__14-09-02(1).JPG', 'frame_num': 1, 'datetime': '6-Aug-18 14:09:02', 'class': ['empty']}, {'file': 'Hudspeth001/Cam010/Hudspeth001__Cam010__2018-08-06__14-09-12(2).JPG', 'frame_num': 2, 'datetime': '6-Aug-18 14:09:12', 'class': ['empty']}, {'file': 'Hudspeth001/Cam010/Hudspeth001__Cam010__2018-08-06__14-09-18(3).JPG', 'frame_num': 3, 'datetime': '6-Aug-18 14:09:18', 'class': ['empty']}, {'file': 'Hudspeth001/Cam010/Hudspeth001__Cam010__2018-08-06__14-09-25(4).JPG', 'frame_num': 4, 'datetime': '6-Aug-18 14:09:25', 'class': ['empty']}, {'file': 'Hudspeth001/Cam010/Hudspeth001__Cam010__2018-08-06__14-09-40(5).JPG', 'frame_num': 5, 'datetime': '6-Aug-18 14:09:40', 'class': ['empty']}, {'file': 'Hudspeth001/Cam010/Hudspeth001__Cam010__2018-08-06__14-09-51(6).JPG', 'frame_num': 6, 'datetime': '6-Aug-18 14:09:51', 'class': ['empty']}, {'file': 'Hudspeth001/Cam010/Hudspeth001__Cam010__2018-08-06__14-09-57(7).JPG', 'frame_num': 7, 'datetime': '6-Aug-18 14:09:57', 'class': ['empty']}], 'location': 'Hudspeth001__Cam010'}]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sequences = process_sequences(embedded, dataset_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# clean up species a bit\n",
    "\n",
    "for seq in sequences:\n",
    "    for im in seq['images']:\n",
    "        if im['class'][0] != 'empty':\n",
    "            \n",
    "            if 'none' in im['class']:\n",
    "                im['class'] = [i for i in im['class'] if i != 'none']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "species_present = set()\n",
    "for seq in sequences:\n",
    "    for im in seq['images']:\n",
    "        if im['class'][0] != 'empty':\n",
    "            species_present.update(im['class'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "species_present"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "# some entries has non-unique frame num - the image entries are duplicated in these\n",
    "\n",
    "fixed_seqs = {}  # seq_id to seq entry\n",
    "\n",
    "for seq in sequences:\n",
    "    frame_numbers = [im['frame_num'] for im in seq['images']]\n",
    "    if len(frame_numbers) != len(set(frame_numbers)):\n",
    "        im_dict = {}\n",
    "        for im in seq['images']:\n",
    "            im_dict[im['file']] = im\n",
    "        deduped_im = list(im_dict.values())\n",
    "        seq['images'] = deduped_im"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2297"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(problem_seqs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'Night/JeffDavis001/Cam004_messeduptimes/JeffDavis001__Cam004__2018-05-03__16-06-44(3).JPG' == 'Night/JeffDavis001/Cam004_messeduptimes/JeffDavis001__Cam004__2018-05-03__16-06-44(3).JPG'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "problem_seqs[100]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2 - Pass the schema check\n",
    "\n",
    "Once your metadata are in the MegaDB format for `sequence` items, we check that they conform to the format's schema.\n",
    "\n",
    "If the format conforms, the following messages will be printed:\n",
    "\n",
    "```\n",
    "Verified that the sequence items meet requirements not captured by the schema.\n",
    "Verified that the sequence items conform to the schema.\n",
    "```\n",
    "\n",
    "For large datasets, the second step will take some time (~ a minute). \n",
    "\n",
    "Otherwise there will be an error message describing what's wrong. Please fix the issues until all checks are passed. You might need to write some snippets of code to loop through the `sequence` items to understand which entries have problems."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Verified that the sequence items meet requirements not captured by the schema.\n",
      "Verified that the sequence items conform to the schema.\n"
     ]
    }
   ],
   "source": [
    "sequences_schema_check.sequences_schema_check(sequences)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Add the few bounding box entries we had"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "im_file_to_bbox = {}\n",
    "im_has_vehicle = []\n",
    "\n",
    "for seq in existing_entries:\n",
    "    for im in seq['images']:\n",
    "        if 'vehicle' in im['class']:\n",
    "            im_has_vehicle.append(im['file'])\n",
    "            \n",
    "        if 'bbox' in im:\n",
    "            im_file_to_bbox[im['file']] = im['bbox']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "for seq in sequences:\n",
    "    for im in seq['images']:\n",
    "        if im['file'] in im_has_vehicle:\n",
    "            \n",
    "            if im['class'][0] == 'empty':\n",
    "                im['class'] = ['vehicle']\n",
    "            else:\n",
    "                if 'vehicle' not in im['class']:\n",
    "                    im['class'].append('vehicle')\n",
    "        if im['file'] in im_file_to_bbox:\n",
    "            im['bbox'] = im_file_to_bbox[im['file']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "39"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count_w_bbox = 0\n",
    "for seq in sequences:\n",
    "    for im in seq['images']:\n",
    "        if 'bbox' in im:\n",
    "            count_w_bbox += 1\n",
    "count_w_bbox"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sample some sequences to make sure things look good\n",
    "\n",
    "sample(sequences, 10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 4 - Save the `sequence` items to a file\n",
    "\n",
    "You can now take the resulting JSON file to the .Net application for bulk insertion to the database:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(path_to_output, 'w') as f:\n",
    "    json.dump(sequences, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can check that the bounding box annotations and paths to images all survived by running the `visualization/visualize_megadb.py` using the above exported file."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:cameratraps] *",
   "language": "python",
   "name": "conda-env-cameratraps-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
